****************************************************************
*Import Tax Data (noNames)
****************************************************************

********************************************************************
** Input Data
********************************************************************


forvalues ii=2008(1)2017 {
	import delimit "$root/Data/Original/Grunddaten_`ii'.csv",  varnames(1) clear
	rename stj			  						year
	rename pid			  						PID
	rename ziv		 							ZIV
	rename anz_kag				   				ANZ_KAG
	rename von									VON
	rename bis       				  			BIS
	rename hem						 			HEM
	rename hef									HEF
	rename erfolg_s							   	ERFOLG_S
	rename erfolg_l								ERFOLG_L
	rename wsert								WSERT
	rename mietwert								MIETWERT
	rename eink_lieg							EINK_LIEG
	rename tot_eink								TOT_EINK
	rename abz_hypo								ABZ_HYPO
	rename reineink		 						REINEINK
	rename eink_eff_bst							EINK_EFF_BST
	rename eink_satz_bst						EINK_SATZ_BST
	rename wsv									WSV
	rename totverm								TOTVERM
	rename schulden								SCHULDEN
	rename rein_verm							REIN_VERM
	rename verm_eff_bst							VERM_EFF_BST
	rename verm_satz_bst						VERM_SATZ_BST
	rename eink_st_bst							EINK_ST_BST
	rename eink_st_gde							EINK_ST_GDE
	rename eink_st_dbs							EINK_ST_DBS
	rename verm_st_bst							VERM_ST_BST
	rename verm_st_gde							VERM_ST_GDE
	rename lot_st_tot							LOT_ST_TOT
	rename kir_st_dt							KIR_ST_DT
	rename kir_st_ep							KIR_ST_EP
	foreach var of varlist _all {
		destring `var', force replace
		replace `var' = 0 if `var'==.
	}
	collapse (sum) ANZ_KAG HEM HEF ERFOLG_S ERFOLG_L WSERT MIETWERT EINK_LIEG ///
	               TOT_EINK ABZ_HYPO REINEINK EINK_EFF_BST EINK_SATZ_BST WSV  ///
				   TOTVERM SCHULDEN REIN_VERM VERM_EFF_BST VERM_SATZ_BST      ///
				   EINK_ST_BST EINK_ST_GDE EINK_ST_DBS VERM_ST_BST VERM_ST_GDE ///
				   LOT_ST_TOT KIR_ST_DT KIR_ST_EP ///
		     (first) ZIV , by(year PID)
	save "$root/Data/Original/Grunddaten_`ii'_keineNamen.dta", replace
}



use "$root/Data/Original/Grunddaten_2008_keineNamen.dta", clear
forvalues ii=2009(1)2017 {
	append using "$root/Data/Original/Grunddaten_`ii'_keineNamen.dta"
}
save "$root/Data/Original/Grunddaten_2008_2017.dta", replace



**********************************************************************
** Pension
import excel "$root/Data/Original/Renten 2008-2012.xlsx",  firstrow clear
rename STJ year
save "$root/Data/Original/pension_2008_2012.dta", replace

import delimit "$root/Data/Original/Renten 2013-2017.csv",  delimit(";") varnames(1) clear
rename ?stj year
rename pid PID
rename jahrgang_dt JAHRGANG_DT_pension
rename jahrgang_ep JAHRGANG_EP_pension
rename m_ahv_iv M_AHV_IV
rename f_ahv_iv F_AHV_IV
keep year PID JAHRGANG_DT_pension JAHRGANG_EP_pension M_AHV_IV F_AHV_IV
destring _all, replace force
save "$root/Data/Original/pension_2013_2017.dta", replace

use "$root/Data/Original/pension_2008_2012.dta", clear
append using "$root/Data/Original/pension_2013_2017.dta"
collapse (sum) M_AHV_IV F_AHV_IV (max) JAHRGANG_DT_pension JAHRGANG_EP_pension, by(year PID)
save "$root/Data/Original/pension_2008_2017.dta", replace

************************************************************************
** Get Age from Simon/Max Data
* Note: I also use ZIV from this data. However, ZIV would also be in the original
 *      data but it was forgotten and quicker to grab here. maybe change in the future
forvalues ii=2008(1)2014 {
	import delimit "$root/Data/Original/VA_Daten_`ii'.csv", ///
	                         delimit(";") varnames(1) clear
	rename ?steuerjahr year
	rename personen_id PID
	rename ziv ZIV_simon
	rename jahrgang_dossiertraeger JAHRGANG_DT
	rename jahrgang_ehepartner     JAHRGANG_EP
	destring JAHRGANG_EP, replace force
	keep year PID JAHRGANG_DT JAHRGANG_EP ZIV_simon
	collapse (max) JAHRGANG_DT JAHRGANG_EP (first) ZIV_simon, by(year PID)
	tempfile age_`ii'
	save `age_`ii'', replace
}

use `age_2008', clear
forvalues ii=2009(1)2014 {
	append using `age_`ii''
}
save "$root/Data/Original/age_2008_2014.dta", replace


*********************************************************
** Combine all the data

use "$root/Data/Original/Grunddaten_2008_2017.dta", clear

* add building deduction (only available for homeowners with mietwert>5000chf)
merge 1:1 PID year using "$root/Data/Original/building_deduction_2008_2017.dta"
drop if _merge==2
drop _
* add pension
merge 1:1 PID year using "$root/Data/Original/pension_2008_2017.dta"
drop if _merge==2
drop _
* use jahrgang from pension data if missing
replace JAHRGANG_DT = JAHRGANG_DT_pension if JAHRGANG_DT==.
replace JAHRGANG_EP = JAHRGANG_EP_pension if JAHRGANG_EP==.
drop JAHRGANG_DT_pension JAHRGANG_EP_pension
* add age from data simon
merge 1:1 PID year using "$root/Data/Original/age_2008_2014.dta"
drop if _merge==2
drop _
replace ZIV = ZIV_simon if ZIV==.
drop ZIV_simon

* replace missing with zero
foreach var of varlist UNTERHALT_WERTERHALTEND UNTERHALT_WERTVERMEHREND ///
						M_AHV_IV F_AHV_IV {
	replace `var'=0 if `var'==.
}


* expand jahrgang for year 2015+ if household present in 2014
sort PID year
forvalue ii = 1(1)4 {
	replace JAHRGANG_DT = JAHRGANG_DT[_n-1] if PID==PID[_n-1] & JAHRGANG_DT==.
	replace JAHRGANG_EP = JAHRGANG_EP[_n-1] if PID==PID[_n-1] & JAHRGANG_EP==.
}

order year PID JAHRGANG_DT JAHRGANG_EP

save "$root/Data/Original/Grunddaten_all.dta", replace

